/*
Create sample for primary analysis 

*/


use "U:\dtafiles\ind_head_spouse.dta", clear
rename age* ageind*
merge m:1 intnum69 using "U:\dtafiles\fam69.dta"
drop _merge
foreach num of numlist 70/97 99 11 13 15 17 19{
	merge m:1 intnum`num' using "U:\dtafiles\fam`num'.dta"
	drop _merge
}

foreach num of numlist 1 3 5 7 9{
	merge m:1 intnum0`num' using "U:\dtafiles\fam0`num'.dta"
	drop _merge
}
drop if ER30002 == .

foreach v in intnum seq relhead age sex agespouse race racespouse educ ageind state maritalstatus inhome husband_inc wife_inc {
	foreach num of numlist 1 3 5 7 9{
	rename `v'0`num' `v'`num'
	}
}
rename sex sex_ind
reshape long intnum seq relhead age sex agespouse ageind educ race racespouse state maritalstatus inhome husband_inc wife_inc, i(intnum68 ER30002) j(year)

replace year = year +1900 if year >60
replace year = year +2000 if year < 60
drop if intnum == 0
drop if year < 1970



merge m:1 intnum year using "U:\dtafiles\state_county_clean.dta"
drop if _merge == 2
drop _merge

rename state_fips resp_state_fips
rename county_fips resp_county_fips
rename statecounty resp_statecounty

rename ER30001 resp_ER30001
rename ER30002 resp_ER30002

*** Merge on mother's location
rename mother_ER30001 ER30001
rename mother_ER30002 ER30002
merge m:1 ER30001 ER30002 year  using "U:\dtafiles\indid_famid_location.dta"
drop if _merge == 2

rename state_fips mother_state_fips
rename county_fips mother_county_fips
rename statecounty mother_statecounty

rename ER30001 mother_ER30001
rename ER30002 mother_ER30002
rename _merge mother_merge
*** Merge on father's location
rename father_ER30001 ER30001
rename father_ER30002 ER30002
merge m:1 ER30001 ER30002 year  using "U:\dtafiles\indid_famid_location.dta"
drop if _merge == 2

rename state_fips father_state_fips
rename county_fips father_county_fips
rename statecounty father_statecounty

rename ER30001 father_ER30001
rename ER30002 father_ER30002
rename _merge father_merge

 g headspouse = (relhead == 1 | relhead == 2 | relhead == 10 | relhead == 20 | relhead == 22)
keep if headspouse == 1 // Keep only if it is a head or a spouse
keep if seq <= 2 // Keep only if it is not a person moving out of a HH in the same year

bys intnum year: g numper = _n // count number of people within HH




*** Assign based on whichever spouse has an attached location for their parent (is never both b/c would have to be two PSID respondents married to each other)

 

bys intnum year: egen mother_statecountyadj = min(mother_statecounty)
bys intnum year: egen father_statecountyadj = min(father_statecounty)
bys intnum year: egen mother_gender= max(sex_ind*(mother_statecounty != .))
bys intnum year: egen father_gender = max(sex_ind*(father_statecounty != .))
bys intnum year: egen mother_stateadj = min(mother_state_fips)
bys intnum year: egen father_stateadj = min(father_state_fips)

g same_county_parent = (resp_statecounty == mother_statecountyadj | resp_statecounty == father_statecountyadj)
g same_county_mother = resp_statecounty == mother_statecountyadj
g same_county_father = resp_statecounty == father_statecountyadj

g same_state_parent = (resp_state_fips == mother_stateadj | resp_state_fips == father_stateadj)
g same_state_mother = resp_state_fips == mother_stateadj
g same_state_father = resp_state_fips == father_stateadj


g firstbirthind = year == year_firstchild

g inc_f = wife_inc if sex == 1
replace inc_f = husband_inc if sex == 2 
g inc_m = husband_inc if sex == 1
replace inc_m = wife_inc if sex == 2 

g white_f = racespouse ==1 if racespouse != . & sex == 1
replace white_f = race ==1 if racespouse == . | sex == 2

g black_f = racespouse ==2 if racespouse != . & sex == 1
replace black_f = race ==2 if racespouse == . | sex == 2


g college = educ >= 16
replace college =. if educ >90 | educ == 0

g hsdegree = educ >= 12 & educ <= 15
replace hsdegree = . if educ > 90 | educ == 0

cpigen
replace cpiu = 1.352828 if year == 2013
replace cpiu = 1.376405 if year == 2015
replace cpiu = 1.423461 if year == 2017
replace cpiu = 1.484652 if year == 2019


g inc_f_adj = inc_f/cpiu
g inc_m_adj = inc_m/cpiu


g age_f = agespouse if sex == 1
replace age_f = age if sex == 2
drop if inc_f > 1500000
g inc_f_pos = inc_f > 100
replace inc_f_pos = . if inc_f == .
keep if age_f >= 18 & age_f <= 55

g year_fiveprebirth = year_firstchild -5
g year_tenpostbirth = year_firstchild+10

g insample = (year>= year_fiveprebirth & year<= year_tenpostbirth)
keep if insample == 1
** Counting number of years
egen uniqid = group(resp_ER30001 resp_ER30002)
bys uniqid year: gen nyears = _n == 1
 by uniqid: replace nyears = sum(nyears)
 by uniqid: replace nyears = nyears[_N]
 
 ** 
 g preperiod = (year>= year_fiveprebirth & year< year_firstchild)
 g postperiod =(year> year_firstchild & year<= year_tenpostbirth)
 
 bys uniqid preperiod year: gen npreyears = _n == 1 if preperiod == 1
 by uniqid : replace npreyears = sum(npreyears)
 by uniqid : replace npreyears = npreyears[_N]

  bys uniqid postperiod year: gen npostyears = _n == 1 if postperiod == 1
 by uniqid : replace npostyears = sum(npostyears)
 by uniqid : replace npostyears = npostyears[_N]
 
 
 g insample2a = (nyears >= 8 & npreyears > 1 & npostyears > 1)
 g insample2 = (nyears >= 8 & npreyears > 2 & npostyears > 1)
  g insample4 = (nyears >= 12 & npreyears > 1 & npostyears > 1)
 g event0 = year == year_firstchild
 
 foreach num of numlist 1/5{
 	g eventpre`num' = (year+`num' == year_firstchild)
 }
 
  foreach num of numlist 1/10{
 	g eventpost`num' = (year-`num' == year_firstchild)
 }
 
 g eventpre_orig = eventpre2
 replace eventpre2 = 1
  g age_firstchild = year_firstchild -(year- age)
  
 g grandparent_birth = . 
 replace grandparent_birth = 1 if same_county_parent == 1 & event0 == 1
 bysort uniqid (grandparent_birth): replace grandparent_birth = grandparent_birth[1]
 replace grandparent_birth = 0 if grandparent_birth == . 

   g nograndparent_birth = . 
 replace nograndparent_birth = 1 if same_state_parent == 0 & event0 == 1
 bysort uniqid (nograndparent_birth): replace nograndparent_birth = nograndparent_birth[1]
 replace nograndparent_birth = 0 if nograndparent_birth == . 
 
 g married_birth = .
  replace married_birth = 1 if maritalstatus == 1 & event0 == 1
  bysort uniqid (married_birth): replace married_birth = married_birth[1]
  replace married_birth = 0 if married_birth == .
  
  

  g mother_miss = mother_statecountyadj  == .
 * replace mother_miss = 1 if mother_ER30002 > 900
  g father_miss = father_statecountyadj  == .
  *replace father_miss = 1 if father_ER30002 > 900
   
 g inc_adj = inc_m_adj if sex_ind == 1
replace inc_adj = inc_f_adj if sex_ind == 2
 
 
 
 rename resp_statecounty county1
 rename mother_statecountyadj  county2
 merge m:1 county1 county2 using "U:\dtafiles\dist_counties.dta"
 drop if _merge == 2
 tab _merge mother_miss
 
 rename mi_to_county dist_mother
 rename county2 mother_statecountyadj
  replace dist_mother = 0 if county1 == mother_statecountyadj
 drop _merge
 
  rename father_statecountyadj  county2
 merge m:1 county1 county2 using "U:\dtafiles\dist_counties.dta"
 drop if _merge == 2
 tab _merge father_miss
 
 rename mi_to_county dist_father
 rename county2 father_statecountyadj
 rename county1 resp_statecounty
 drop _merge

  replace dist_father = 0 if resp_statecounty == father_statecountyadj
 
 g lessthan50 = dist_mother <=50 | dist_father <= 50
  g lessthan100 = dist_mother <=100 | dist_father <= 100
  
  
   g lessthan100_birth = . 
 replace lessthan100_birth = 1 if lessthan100 == 1 & event0 == 1
 bysort uniqid (lessthan100_birth): replace lessthan100_birth = lessthan100_birth[1]
 replace lessthan100_birth = 0 if lessthan100_birth == . 
 
    g lessthan50_birth = . 
 replace lessthan50_birth = 1 if lessthan50 == 1 & event0 == 1
 bysort uniqid (lessthan50_birth): replace lessthan50_birth = lessthan50_birth[1]
 replace lessthan50_birth = 0 if lessthan50_birth == . 
 
 
 
    g mindistparent = dist_mother if dist_mother < dist_father
	replace mindistparent = dist_father if (dist_mother >= dist_father)

g dist_birth = mindistparent if  event0 == 1
 bysort uniqid (dist_birth): replace dist_birth = dist_birth[1]


g dist_birthpre1 = mindistparent if  eventpre_orig == 1
 bysort uniqid (dist_birthpre1 ): replace dist_birthpre1  = dist_birthpre1[1]
 
g greaterthan500_birth = dist_birth == 999
 
 g dist_cat = .
 replace dist_cat = 1 if dist_birth == 0
 replace dist_cat = 2 if dist_birth >0 & dist_birth <25

     replace dist_cat = 3 if dist_birth >25 & dist_birth <50
     replace dist_cat = 4 if dist_birth >50 & dist_birth <100
	 replace dist_cat = 4 if dist_birth > 100
	 

	 
	 
	** Weighting for near and far from  grandparent ( defined as < 25 miles)
		 g near_indicator = dist_cat <= 2

 
g inc_f_prebirth2 = log(inc_f_adj) 	 if  eventpre_orig == 1
 bysort uniqid (inc_f_prebirth2 ): replace inc_f_prebirth2 = inc_f_prebirth2[1]
 
 g miss_preincf2 = inc_f_prebirth2 == .
 replace inc_f_prebirth2 = 999999 if miss_preincf2 == 1
 


g inc_f_prebirth1 = log(inc_f_adj) 	 if  eventpre1 == 1
 bysort uniqid (inc_f_prebirth1 ): replace inc_f_prebirth1 = inc_f_prebirth1[1]
 
 g miss_preincf1 = inc_f_prebirth1 == .
 replace inc_f_prebirth1 = 999999 if miss_preincf1 == 1

 g miss_white = white == .
 replace white = 99 if miss_white == 1
 g miss_black = black == .
 replace black = 99 if miss_black == 1
 g miss_college = college == .
 replace college = 99 if miss_college == 1
 g miss_hs = hsdegree == .
 replace hsdegree = 99 if miss_hs == 1
 
 
	 probit near_indicator married  college hsdegree white black   inc_f_prebirth* age_firstchild miss*
	 predict pi
	 
	 g grandparent_weight =.
	 summ pi
	 replace grandparent_weight = `r(mean)'/pi if near_indicator == 1
	 replace grandparent_weight = (1-`r(mean)')/(1-pi) if near_indicator == 0
	 
	 
	 
	 
	*** Child Care Measures 
 merge m:1 resp_statecounty using "U:\dtafiles\childcare_clean.dta"
 drop if _merge == 2
 drop _merge
 
  g above75CC = CC_centile >= 75
 g CC_in10s = MCToddler/10
 
 

 ** Weighting for high vs low CC
 probit above75CC married  college hsdegree white black  inc_f_prebirth* age_firstchild miss*
	 predict pi2
	 
	 g cc_weight =.
	 summ pi2
	 replace cc_weight = `r(mean)'/pi2 if above75CC == 1
	 replace cc_weight = (1-`r(mean)')/(1-pi2) if above75CC == 0

 *** Aggregating all the post-period into a single indicator
drop miss_pre*

 g pre_period_omit1 = eventpre5 == 1 | eventpre4 == 1 | eventpre3 == 1 | eventpre2== 1
 g pre_period_omit2  = eventpre5 == 1 | eventpre4 == 1 | eventpre3 == 1 
 
 
save "U:\dtafiles\penaltysample_v1.dta", replace